﻿
************************************************************************************************
* Constructing data set to "Public Health Policy At Scale.."
************************************************************************************************
* Variables from medical birth records
*************************************************************************************************


global rawStata "X:\Data\Workdata\707116\Stata"
global temp2 "X:\Data\Workdata\707116\ChildVax\02_temp\Nov2019" 


set mem 2g
set more off
set linesize 255

**********************************************************************************************
** Main data set - population and variables from medical birth records
*********************************************************************************************

*******************************************************************************************
* population born 1973-1996 comes in data set LPRMFRLF1996
* gestational age only from 1978
* birthweight collected in 500 grams before 1978
***************************************************************************************


clear all

use V_FCPR V_MCPR K_BCPR B_FLERFOLD C_BOPAMT C_BOPKOM C_CIVSTD C_FODEAMT C_FODEKOM C_FODESTED C_PLAC C_BOPKOM D_FODTDTO D_SM V_FALDER V_GRAV V_LANGDE V_MALDER V_SVLANGDE V_VAGT using $rawStata\lprmfrlf1996.dta

gen fpnr_mfr=V_FCPR
gen mpnr_mfr=V_MCPR
gen pnr=K_BCPR 

assert D_FODTDTO!=.

g bday = day(D_FODTDTO)
label var bday "day of birth" 
g bmonth = month(D_FODTDTO)
label var bmonth "month of birth" 
replace bday=28 if bday==29 & bmonth==2
g byear = year(D_FODTDTO)
label var byear "year of birth" 

g bday_1yr = mdy(bmonth,bday,byear+1)
label var bday_1yr "1st birthday" 
g bday_5yr = mdy(bmonth,bday,byear+5)
label var bday_5yr "5th birthday" 

g bday_12yr = mdy(bmonth,bday,byear+12)
label var bday_12yr "12th birthday" 


format bday_1yr %tdD_m_Y 
format bday_5yr %tdD_m_Y 
format bday_12yr %tdD_m_Y 

********gestational age ****************************
****** only information on cohorts 1978-1996
rename  V_SVLANGDE gage_wks
*implausible gest age to missing 
replace gage_wks=. if (gage_wks<22 | gage_wks>50) 

g flag = (gage_wks==.) 
table byear, c(mean flag) 
drop flag 

** Pretermt according to WHO: 
** extreme preterm: <28 weeks/196 d, very preterm: 28 - <32 weeks/196-224. moderate to late preterm: <37 weeks/259 d 

gen ltpreterm=0
replace ltpreterm=1 if gage_wks<37
replace ltpreterm=. if gage_wks==. 


** Pre-term is all before 37 completed weeks (259 days)
gen fullterm37=0
replace fullterm37=1 if gage_wks>=37
replace fullterm37=. if gage_wks==.

rename V_VAGT birthweight
replace birthweight=. if birthweight==0 | birthweight>9000
replace birthweight=. if birthweight==6000 & byear==1977
g flag=(birthweight==.)
table byear, c(min birthweight max birthweight)
table byear flag
drop flag

*low birth weight*
g LBW=0 
replace LBW=1 if birthweight<2500
replace LBW=. if birthweight==.

g 			VLBW=0 
replace		VLBW=1 if birthweight<1500
replace     VLBW=. if birthweight==.

table byear, c(mean LBW mean VLBW)

******Mother age at birth ********************
gen mage = V_MALDER

sum mage 

*Singleton births
bys mpnr_mfr D_FODTDTO: g multibirth = _N
tab multibirth // this looks fine too 

drop if pnr==""
 
keep pnr fpnr_mfr mpnr_mfr birthweight LBW VLBW bday bmonth byear bday_1yr bday_5yr bday_12yr ///
gage_wks ltpreterm fullterm* mage multibirth C_CIVSTD

sort pnr

save $temp2\lpr1996.dta, replace 

